################################################
# Replication file for Haber and Menaldo 2011
# Xiaoyu Li
# Updated: April, 2014
################################################


# Clear workspace
rm(list=ls())

# Set working directory
setwd("H:/Dropbox/G2001/Haber and Menaldo Data and Do File/HM RAW DATA")

# Require libraries
library(foreign)
library(zoo)
library(lmtest)
library(Formula)
library(plm)
library(car)


############
## DATA 
############


# Load data
DATA <- read.dta("H:/Dropbox/G2001/Haber and Menaldo Data and Do File/HM RAW DATA/Haber_Menaldo_2011_APSR_Dataset_Final.dta")
names(DATA)

# Subset data
var <- c("hmccode", "year","D_polity_s_interp", "L_polity_s_interp","L_Log_tot_oil_inc_interp","D_Log_tot_oil_inc_interp","L_LogPerCapGDP_interp",
         "L_CivilWar_interp","L_REGION_DEM_DIFFUSE","D_LogperCapGDP_int","D_Region_Dem_Diffuse","L_tot_oil_inc_interp","D_tot_oil_inc_interp",
         "L_WORLD_DEM_DIFFUSE","D_World_Dem_Diffuse","Has_oil_inc","L_has_oil_inc","D_has_oil_inc","TOI_INC","Log_TOI_INC")
subset <- DATA[var]

# Delete observations with missing values
subset <- na.omit(subset)

# Set as panel data
E <- pdata.frame(subset, index = c("hmccode","year"), drop.index = FALSE, row.names = TRUE)

# Create dummies for country and year
E$year.f<-factor(E$year)
E$hmccode.f<-factor(E$hmccode)
names(E)


################
# Estimation
################


###################
# original model without world variables
HM <-   plm(D_polity_s_interp ~ L_polity_s_interp + L_tot_oil_inc_interp +
             D_tot_oil_inc_interp + L_LogPerCapGDP_interp  + L_CivilWar_interp +
             L_REGION_DEM_DIFFUSE + D_LogperCapGDP_int  +
             D_Region_Dem_Diffuse +E$year.f+E$hmccode.f,
            data=E, model="pooling", index = c("year","hmccode"),vcov = vcovSCC) 

# Residual plot
HM_residuals <- HM$residuals
HM_fit <- predict(HM)
plot(E$L_tot_oil_inc_interp, HM_residuals)

# Results
HM_res <- cbind(summary(HM)$coefficients[,1:4])
HM_res <- HM_res[2:11,]
HM_res

# Long-run Multiplier classical vs robust
deltaMethod(HM, "-L_tot_oil_inc_interp/L_polity_s_interp")
deltaMethod(HM, "-L_tot_oil_inc_interp/L_polity_s_interp", vcov=vcovSCC(HM, type="HC1", maxlag=1))

# Coefficients test with robust SE
HM_test <- coeftest(HM, vcov=function(x) vcovSCC(x, type="HC1", maxlag=1))[2:11,]
HM_test

# SE comparisons
HM_se_com <- HM_res[,2]/HM_test[,2]
HM_se_com

#######################################
# original model with world variables

# year and two world variables (sd = 0 within years)
library(plyr)
ddply(E, .(year.f), summarise, 
      L_World_mean = mean(L_WORLD_DEM_DIFFUSE),
      L_World_sd = sd(L_WORLD_DEM_DIFFUSE),
      D_World_mean = mean(D_World_Dem_Diffuse),
      D_World_sd = sd(D_World_Dem_Diffuse))

# model
HM_w <-   plm(D_polity_s_interp ~ L_polity_s_interp + L_tot_oil_inc_interp +
              D_tot_oil_inc_interp + L_LogPerCapGDP_interp  + L_CivilWar_interp +
              L_REGION_DEM_DIFFUSE + D_LogperCapGDP_int  +
              D_Region_Dem_Diffuse + L_WORLD_DEM_DIFFUSE + D_World_Dem_Diffuse + E$year.f+E$hmccode.f
            , data=E, model="pooling", index = c("year","hmccode"),vcov = vcovSCC) 

# summary(HM_w);
# Error in crossprod(t(X), beta) : non-conformable arguments


##########################
# Logged income model
Log <-   plm(D_polity_s_interp ~ L_polity_s_interp + L_Log_tot_oil_inc_interp +
                 D_Log_tot_oil_inc_interp +  L_LogPerCapGDP_interp  + L_CivilWar_interp +
                 L_REGION_DEM_DIFFUSE + D_LogperCapGDP_int  +
                 D_Region_Dem_Diffuse +E$year.f+E$hmccode.f
               , data=E, model="pooling", index = c("year","hmccode"),vcov = vcovSCC) 

# Residual plot
Log_residuals <- Log$residuals
Log_fit <- predict(Log)
plot(E$L_Log_tot_oil_inc_interp, Log_residuals)

# Results
Log_res <- cbind(summary(Log)$coefficients[,1:4])
Log_res <- Log_res[2:11,]
Log_res

# Long-run Multiplier classical vs robust
deltaMethod(Log, "-L_Log_tot_oil_inc_interp/L_polity_s_interp")
deltaMethod(Log, "-L_Log_tot_oil_inc_interp/L_polity_s_interp", vcov=vcovSCC(Log, type="HC1", maxlag=1))

# Coefficients test with robust SE
Log_test <- coeftest(Log, vcov=function(x) vcovSCC(x, type="HC1", maxlag=1))[2:11,]
Log_test

# SE comparisons
Log_se_com <- Log_res[,2]/Log_test[,2]
Log_se_com

##########################
# Logged income model with an income dummy
Log_d <-   plm(D_polity_s_interp ~ L_polity_s_interp + L_Log_tot_oil_inc_interp +
               D_Log_tot_oil_inc_interp +  L_LogPerCapGDP_interp  + L_CivilWar_interp +
               L_REGION_DEM_DIFFUSE + D_LogperCapGDP_int  +
               D_Region_Dem_Diffuse + L_has_oil_inc +E$year.f+E$hmccode.f
             , data=E, model="pooling", index = c("year","hmccode"),vcov = vcovSCC) 

# Residual plot
Log_d_residuals <- Log_d$residuals
Log_d_fit <- predict(Log_d)
plot(Log_d_fit, Log_d_residuals)
plot(E$L_Log_tot_oil_inc_interp, Log_d_residuals)
plot(E$L_tot_oil_inc_interp, HM_residuals)


plot(E$L_Log_tot_oil_inc_interp[E$Has_oil_inc==1], Log_d_residuals[E$Has_oil_inc==1])
plot(E$L_tot_oil_inc_interp[E$Has_oil_inc==1], Log_d_residuals[E$Has_oil_inc==1])

# Results
Log_d_res <- cbind(summary(Log_d)$coefficients[,1:4])
Log_d_res <- Log_d_res[2:11,]
Log_d_res

# Long-run Multiplier classical vs robust
deltaMethod(Log_d, "-L_Log_tot_oil_inc_interp/L_polity_s_interp")
deltaMethod(Log_d, "-L_Log_tot_oil_inc_interp/L_polity_s_interp", vcov=vcovSCC(Log_d, type="HC1", maxlag=1))

# Coefficients test with robust SE
Log_d_test <- coeftest(Log_d, vcov=function(x) vcovSCC(x, type="HC1", maxlag=1))[2:11,]
Log_d_test

# SE comparisons
Log_d_se_com <- Log_d_res[,2]/Log_d_test[,2]
Log_d_se_com

cbind(HM_se_com, Log_se_com, Log_d_se_com)
cbind(Log_test[,1:3],Log_d_test[,1:3])

#########################
# Histogram of income
pdf("Hist_Inc_All.pdf")
par(mfrow=c(1,2))
hist(E$TOI_INC, 
     xlab = 'Total Oil Income Per Capita in Thousands of 2007 Dollars, \n All Countries', 
     main = 'Total Oil Income')
hist(E$Log_TOI_INC, 
     xlab = 'Transformed Total Oil Income Per Capita, \n All Countries', 
     main = 'Transformed Total Oil Income')

pdf("Hist_Inc_HasIncome.pdf")
par(mfrow=c(1,2))
hist(E$TOI_INC[E$TOI_INC>0], 
     xlab = 'Total Oil Income Per Capita in Thousands of 2007 Dollars, \n Excluding Countries with No Oil Income', 
     main = 'Total Oil Income')
hist(E$Log_TOI_INC[E$TOI_INC>0], 
     xlab = 'Transformed Total Oil Income Per Capita,\n Excluding Countries with No Oil Income', 
     main = 'Tranformed Total Oil Income')
graphics.off()
